import re
import requests

head = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
    }


def getbooklists(url, num):
    html = requests.get(url,headers=head).text
    mylist = re.findall(r'<td valign="top">(.*?)</td>',html,re.S)
    for web in mylist:
        llist = [
            "书名："+re.findall(r'title="(.*?)"', web)[0],
            "简介：" + re.findall(r'<p class="pl">(.*?)</p>', web)[0].replace("/", "|"),
            "评分：" + re.findall(r'<span class="rating_nums">(.*)</span>', web)[0],
            "评价人数：" + str(re.findall(r'<span class="pl">\(\n(.*?)人评价\n.*\)</span>', web)[0]).replace(" ", "")
               ]
        f.write(f"第{num}本书\n"+"\n".join(llist))
        ying = re.findall(r'<span class="inq">(.*?)</span>', web)
        if ying:
            f.write("\n"+"引用语："+ying[0]+"\n\n")
        else:
            f.write("\n\n")
        num += 1


with open("D:/JetBrains Projects/Pycharm Projects/python-learn/爬虫作业/豆瓣图书/豆瓣图书.txt", "a", encoding="UTF-8") as f:
    for i in range(10):
        url = "https://book.douban.com/top250?start="+str(i*25)
        getbooklists(url, 25*i + 1)
